home *** CD-ROM | disk | FTP | other *** search
/ Tech Arsenal 1 / Tech Arsenal (Arsenal Computer).ISO / tek-01 / snip0493.zip / BMHASRCH.C < prev    next >
C/C++ Source or Header  |  1993-04-05  |  3KB  |  102 lines

  1. /*
  2. **  Boyer-Moore-Horspool pattern match
  3. **  Case-insensitive with accented character translation
  4. **
  5. **  public domain by Raymond Gardner 7/92
  6. **
  7. **  limitation: pattern length + subject length must be less than 32767
  8. */
  9.  
  10. #include <stddef.h>
  11. #include <string.h>
  12.  
  13. typedef unsigned char uchar;
  14.  
  15. #define LOWER_ACCENTED_CHARS
  16.  
  17. unsigned char lowervec[256] = {
  18.   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
  19.  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  20.  32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
  21. '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
  22. '@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  23. 'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
  24. '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  25. 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,
  26. #ifdef LOWER_ACCENTED_CHARS
  27. 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a',
  28. 'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159,
  29. 'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175,
  30. #else
  31. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  32. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  33. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  34. #endif
  35. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  36. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  37. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  38. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  39. 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  40. };
  41.  
  42. #define lowerc(c) lowervec[(uchar)(c)]
  43.  
  44. #define LARGE 32767
  45.  
  46. static int patlen;
  47. static int skip[256];
  48. static int skip2;
  49. static uchar *pat;
  50.  
  51. void bmh_init(const char *pattern)
  52. {
  53.       int i, j;
  54.       pat = (uchar *)pattern;
  55.       patlen = strlen(pattern);
  56.       for (i = 0; i < 256; ++i)
  57.       {
  58.             skip[i] = patlen;
  59.             for (j = patlen - 1; j >= 0; --j)
  60.             {
  61.                   if (lowerc(i) == lowerc(pat[j]))
  62.                         break;
  63.             }
  64.             if (j >= 0)
  65.                   skip[i] = patlen - j - 1;
  66.             if (j == patlen - 1)
  67.                   skip[i] = LARGE;
  68.       }
  69.       skip2 = patlen;
  70.       for (i = 0; i < patlen - 1; ++i)
  71.       {
  72.             if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) )
  73.                   skip2 = patlen - i - 1;
  74.       }
  75. }
  76.  
  77. char *bmh_search(const char *string, const int stringlen)
  78. {
  79.       int i, j;
  80.       char *s;
  81.  
  82.       i = patlen - 1 - stringlen;
  83.       if (i >= 0)
  84.             return NULL;
  85.       string += stringlen;
  86.       for ( ;; )
  87.       {
  88.             while ((i += skip[((uchar *)string)[i]]) < 0)
  89.                   ;                           /* mighty fast inner loop */
  90.             if (i < (LARGE - stringlen))
  91.                   return NULL;
  92.             i -= LARGE;
  93.             j = patlen - 1;
  94.             s = (char *)string + (i - j);
  95.             while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j]))
  96.                   ;
  97.             if (j >= 0)
  98.                   i += skip2;
  99.             else  return s + j + 1;
  100.       }
  101. }
  102.